* Reset settings and initialize log file
launch, path("share/calendars")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Show cross-state differences in the timing of school closures.
*-------------------------------------------------------------------------------


* Create a map showing how each state is classified
*-------------------------------------------------------------------------------

* Load list of state classifications
use "$basepath/data/derived/state_closure_timing.dta", clear

* Merge in state shapefile
merge 1:1 state_fips using "$basepath/data/derived/state_db.dta", assert(2 3) keep(3) nogenerate

* Create the map (omitting Alaska and Hawaii)
#delimit ;
spmap closure_timing if !inlist(state_fips, 2, 15) using "$basepath/data/derived/state_coord.dta",
	id(_ID)
	title("")
	clmethod(unique)
	fcolor(ebblue*1.2 $col6 orange*0.8)
	osize(vthin)
	legtitle("")
	legend(size(*2.2));
#delimit cr

nicepdf "$basepath/output/calendars_map.pdf", indirect replace

* Re-save as a smaller .png file
shell sips -s format png "$basepath/output/calendars_map.pdf" --out "$basepath/output/calendars_map.png"


* Create a scatter plot relating two measures of closure timing
*-------------------------------------------------------------------------------

#delimit ;
twoway
	(scatter closure_share_teachers closure_share_students if closure_timing == 1, mcolor($col1) msymbol($sym1))
	(scatter closure_share_teachers closure_share_students if closure_timing == 2, mcolor($dkgs) msymbol($sym2))
	(scatter closure_share_teachers closure_share_students if closure_timing == 3, mcolor($col2) msymbol($sym3))
	(function y = x, range(0 100) lcolor(black)),
	xtitle("Early closures based on student presence")
	xscale(range(0 100))
	xlabel(0(20)100)
	ytitle("Early closures based on teacher presence")
	yscale(range(0 100) titlegap(*-5))
	ylabel(0(20)100)
	xsize(5.5)
	ysize(5)
	plotregion(margin(l=0 b=0))
	legend(rows(3) ring(0) position(10) size(*0.95) order(1 2 3) bmargin(b=0 r=0)
		label(1 "Classified as early closures")
		label(2 "Classified as mixed closures")
		label(3 "Classified as late closures"));
#delimit cr

nicepdf "$basepath/output/calendars_scatter.pdf", indirect replace


* Show that our classification is fairly stable over our analysis period
*-------------------------------------------------------------------------------

* Compute the fraction of 16-year olds enrolled in high school in each month
gzuse "$basepath/data/derived/cps_bms_sample_allages.dta.gz", clear
keep if age == 16
gen byte students = (school_status == 1)
gen decade = .
replace decade = 1 if inrange(year, 1989, 1999)
replace decade = 2 if inrange(year, 2000, 2009)
replace decade = 3 if inrange(year, 2010, 2019)
gcollapse (mean) students [pw = wtfinl], by(state_fips decade month)

* For each measure, compute the share of the May-to-July drop occurring by June
keep if inlist(month, 5, 6, 7)
reshape wide students, i(state_fips decade) j(month)
gen closure_share = 100 * (students5 - students6)/(students5 - students7)

* Classify state x decades as having early, late, or mixed closures
gen closure_timing = .
replace closure_timing = 1 if closure_share >= 66.67
replace closure_timing = 2 if closure_share >= 33.33 & closure_share < 66.67
replace closure_timing = 3 if closure_share <  33.33

label define closure_timing_lbl 1 "Early school closures", add
label define closure_timing_lbl 2 "Mixed school closures", add
label define closure_timing_lbl 3 "Late school closures", add
label values closure_timing closure_timing_lbl

* Compare assignments across decades
keep state_fips decade closure_timing
reshape wide closure_timing, i(state_fips) j(decade)
tab closure_timing1 closure_timing3
count if closure_timing1 == closure_timing3
count if closure_timing1 == closure_timing2 & closure_timing2 == closure_timing3

* Close the log file
unlaunch
